********************************************************************************
*** Voting for Populism in Europe Replication Files
*** Figure 1: Economic & Social Globalization 1970-2018
*** Source: https://datacatalog.worldbank.org/dataset/world-development-indicators
*** Downloaded 20 December 2019
*** Required data files: "data/wdi/var_labels_complete.xlsx", "data/wdi/WDIDATA-2020.csv", and "data/wdi/wdi_labels.do"
*** 
*** Note: Need to add ccode ado file from http://www.columbia.edu/~rh2883/stats.html
*** Created by: Thomas Cunningham
*** Date: 21 December 2020
******************************************************************
clear all
version 16.1
set more off
set maxvar 120000
cd ..
* Import Variable Labels
import excel "data\wdi\var_labels_complete.xlsx", firstrow case(lower) cellrange(a1:e1666) clear
	keep icode ivname ivlabel
tempfile var_labels
save `var_labels'


* Import WDI Data
import delimited "data/wdi/WDIData-2020.csv", delimiter(",") varnames(1) clear

* Rename year variables
foreach v of varlist v5-v64{
	local x : variable label `v'
	rename `v' y`x'
	}

* Drop empty column
drop v65

* Rename key vars
rename ïcountryname cname
rename countrycode wbcoded
rename indicatorname iname
rename indicatorcode icode

* Merge Variable Labels
merge m:1 icode using `var_labels', gen(labm)
drop if labm==2 
drop labm

	rename ivname vname
	
	order cname wbcode  vname y* 
	keep cname wbcode vname y* 
		

		
*** SUBSET DATA AND THEN COMBINE DUE TO LARGE FILE SIZE ****

local subset1 "cname-y1970"
local subset2 "cname wbcode vname y1971-y1990"
local subset3 "cname wbcode vname y1991-y2005"
local subset4 "cname wbcode vname y2006-y2018"


forval k=1/4{
	
	preserve
	
	* Subset to time period
	keep `subset`k''

	*Reshape to wide
	reshape wide y*, i(cname wbcode) j(vname) string

	* Reorder variable name in order to reshape without errors (e.g., put date at the end of string name)
	rename (y*#*) (*[3]*[1]#[2])

	* Collect Stubs
	unab vars : *_*

	local stubs
	
	foreach var of local vars { //rename variables
	   local stub = substr("`var'",1,length("`var'")-4)
	   local stubs : list stubs | stub
	   }
	display "`stubs'"

	* Reshape to long
	reshape long `stubs', i(cname wbcode) j(year)

	tempfile wdi_`k'
	save `wdi_`k''
	
	restore
}
		
		
* Append subsets of data together
use `wdi_1', clear

forval u=2/4{
	append using `wdi_`u''
}

sort cname year

* Add variable labels

do "data/wdi/wdi_labels.do" 

la var cname "country"
la var wbcoded "wbcode"
la var year "year"


ccode wbcoded,from(WB) to(COW) gen(ccode)

la var ccode "Correlates of War Country Code"

ta cname if ccode == .
	
	* Manually assign some country codes
	replace ccode = 232 if cname == "Andorra"
	replace ccode = . if cname == "Channel Islands" // Not in COW
	replace ccode = 490 if cname == "Congo, Dem. Rep."
	replace ccode = . if cname == "Curacao" //  Not in COW
	replace ccode = . if cname == "Isle of Man" // Not in COW
	replace ccode = 347 if cname == "Kosovo"
	replace ccode = 360 if cname == "Romania"
	replace ccode = 345 if cname == "Serbia"
	replace ccode = . if cname == "Sint Maarten (Dutch part)" // Not in COW
	replace ccode = 626 if cname == "South Sudan"
	replace ccode = . if cname == "St. Martin (French part)" // Not in COW
	replace ccode = 860 if cname == "Timor-Leste"
	replace ccode = 665 if cname == "West Bank and Gaza"
	replace ccode = 341 if cname == "Montenegro"

	ta cname if ccode == .


drop if ccode == . 

order cname ccode *

*Subset after 1960
keep if year >=1960

** Set cross-sectional and time-series vars
xtset ccode year, yearly

 
* International Migration Stock measured at five-year intervals. Intervening years filled in from prior census

gen mig_stk_pct = (pop_migrant_stock_tot/pop_total)*100
bys ccode: carryforward mig_stk_pct, gen(mig_stk)


gen oecd_23=0
	replace oecd_23 = 1 if inlist(ccode, 2, 20, 200, 205, 210, 211, 212, 220, 225, 230)
	replace oecd_23 = 1 if inlist(ccode, 235, 255, 305, 325, 350, 375, 380, 385, 390)
	replace oecd_23 = 1 if inlist(ccode, 395, 740, 900, 920)

keep if oecd_23==1
drop oecd_23

recode ccode (305=1 "Austria")(211=2 "Belgium")(225=3 "Switzerland")(255=4 "Germany")(390=5 "Denmark")(230=6 "Spain")(375=7 "Finland")(220=8 "France") /// assign ccode label for tables
		(200=9 "Great Britain")(350=10 "Greece")(205=11 "Ireland")(325=12 "Italy")(212=13 "Luxembourg")(210=14 "Netherlands")(385=15 "Norway")(235=16 "Portugal")(380=17 "Sweden"), gen(country)

keep if country <=17
keep if year >=1970
keep if year<2019 // Data not yet available for 2019-2020 as of time of analysis
drop if country == 3 | country == 13 // Switzerland, Luxembourg
drop if cname == "United States" // drop US

** Generating mean values of globalization variables
collapse (mean) trade_gdp_pct fdi_net_inflows_gdp mig_stk, by(year)

la var trade_gdp_pct "Trade"
la var fdi_net_inflows_gdp "FDI Inflows"
la var mig_stk "Migrant Stocks"

* Figure 1
twoway line trade_gdp_pct year|| line fdi_net_inflows_gdp year, lpattern(dash) || /*
*/ line mig_stk year, ///
 scheme(s1color) ///
text(40 2007.9 "Financial Crisis", place(e) size(small)) ///
xline(2008,lpattern(solid) lcolor(gray)) ///
xlab(1970(5)2020) ///
xtitle("") ///
ytitle("% GDP/Population") ///
title("Economic & Social Globalization, 1970-2018")  ///
subtitle("15 European Countries") ///
note("Source: WDI, 2020", size(vsmall))
graph export "output/figure1.pdf", replace 


